#################################################################################################
#This file conducts the analysis on the litigation and credit claiming paper.
#################################################################################################
#set working directory
setwd("~/Dropbox/LobbyingInsideOut/LitigationPaper/JLC replication")

#clear working environment
rm(list=ls()) 

#load packages  
library(tidyverse)
library(ggplot2)
library(readxl)
library(data.table)
library(stargazer)
library(xtable)

##################################################################
# Read in data
##################################################################
federal_orgs <-  read.csv("./data/federally_focused_orgs.csv") %>%
  mutate(federal=1,facebook.handle.final=ifelse(facebook.handle.final=="na",NA,facebook.handle.final)) %>%
  filter(!is.na(facebook.handle.final)) %>% distinct() %>%
  dplyr::mutate(twitter.handle.final = tolower(twitter.handle.final), twitter.handle.final = str_replace(twitter.handle.final, "\\@", ""),
                twitter.handle.final = str_trim(twitter.handle.final),
                Organization = gsub("&", "and", organizationNames), Organization = tolower(Organization),
                facebook.handle.final=ifelse(facebook.handle.final=="",NA,facebook.handle.final))
amicus_orgs <-  fread("./data/AmiciOrgsClean.csv") %>% 
  filter(use==1) %>% mutate(amicus=1) %>% dplyr::mutate(twitter.handle.final = tolower(twitter.handle), twitter.handle.final = str_replace(twitter.handle.final, "\\@", ""),
                                                        twitter.handle.final = str_trim(twitter.handle.final),
                                                        Organization = gsub("&", "and", organizationNames), Organization = tolower(Organization),
                                                        facebook.handle.final=ifelse(facebook.handle.final=="",NA,facebook.handle.final)) %>%
  distinct() %>%  filter(!is.na(facebook.handle.final)) 
all_orgs <- full_join(federal_orgs,amicus_orgs,by=c("facebook.handle.final"="facebook.handle.final",
                                                    "twitter.handle.final"="twitter.handle.final",
                                                    "Type"="Type","LegalOrg"="LegalOrg")) %>%
  mutate(og_only=ifelse(federal==1 & is.na(amicus),1,0),amicus = ifelse(is.na(amicus),0,amicus))  %>%
  select(Type, amicus, og_only, twitter.handle.final, LegalOrg,facebook.handle.final) %>% 
  group_by(facebook.handle.final,twitter.handle.final) %>% fill(everything(),.direction="updown") %>% 
  distinct() %>% filter(Type!= "Think Tank") %>% filter(facebook.handle.final != "0")

### Read in files 
ct_amicus_orgs_full <- fread("./data/ct_amicus_orgs_full_JLC.csv")
ct_federal_orgs_full <- fread("./data/ct_federal_orgs_full_JLC.csv")
twitter_federal_orgs_full <- fread("./data/twitter_federal_orgs_full_JLC.csv") 
twitter_amicus_orgs_full <- fread("./data/twitter_amicus_orgs_full_JLC.csv")
ct_fuzzy <- fread("./data/ct_fuzzyjoin_scotus_full_JLC.csv") 
ct_amicus_orgs_dictionary <- fread("./data/ct_amicus_orgs_dictionary_full_JLC.csv")
ct_federal_orgs_dictionary <- fread("./data/ct_federal_orgs_dictionary_full_JLC.csv")

##################################################################
# Tables
##################################################################
#Table 1: number of groups by type
#federal (~1700)
federal_orgs_number <-  read.csv("./data/federally_focused_orgs.csv") %>%
  mutate(federal=1,facebook.handle.final=ifelse(facebook.handle.final=="na",NA,facebook.handle.final)) %>%
  distinct() %>%
  dplyr::mutate(twitter.handle.final = tolower(twitter.handle.final), twitter.handle.final = str_replace(twitter.handle.final, "\\@", ""),
                twitter.handle.final = str_trim(twitter.handle.final),
                Organization = gsub("&", "and", organizationNames), Organization = tolower(Organization),
                facebook.handle.final=ifelse(facebook.handle.final=="",NA,facebook.handle.final)) %>%
  select(facebook.handle.final,twitter.handle.final,Type) %>% 
  distinct() %>% group_by(Type) %>% tally()
federal_orgs_number
#amici dataset (~2700)
amicus_orgs_number <-  fread("./data/AmiciOrgsClean.csv") %>% 
  filter(use==1) %>% mutate(amicus=1) %>% dplyr::mutate(twitter.handle.final = tolower(twitter.handle), twitter.handle.final = str_replace(twitter.handle.final, "\\@", ""),
                                                        twitter.handle.final = str_trim(twitter.handle.final),
                                                        Organization = gsub("&", "and", organizationNames), Organization = tolower(Organization),
                                                        facebook.handle.final=ifelse(facebook.handle.final=="",NA,facebook.handle.final)) %>%
  distinct() %>% select(facebook.handle.final,twitter.handle,Type) %>% 
  distinct() %>% group_by(Type) %>% tally() 
amicus_orgs_number

#Table 2: summary statistics
dict_words <- ct_federal_orgs_full %>% mutate(dict_words=case_words+court_words+amicus_words) %>%
  filter(dict_words>0)
length(unique(dict_words$Page.Name))
dict_words_twitter <- twitter_federal_orgs_full %>% mutate(dict_words=case_words+court_words+amicus_words) %>%
  filter(dict_words>0)
length(unique(dict_words_twitter$handle))
amicus_words <- ct_federal_orgs_full %>% filter(amicus_words==1) %>% filter(amicus_to_remove==0)
length(unique(amicus_words$Page.Name))
amicus_words_twitter <- twitter_federal_orgs_full %>% filter(amicus_words==1) %>% filter(amicus_to_remove==0)
length(unique(amicus_words_twitter$handle))
case_words <- ct_federal_orgs_full %>% filter(case_words==1)
length(unique(case_words$Page.Name))
case_words_twitter <- twitter_federal_orgs_full %>% filter(case_words==1)
length(unique(case_words_twitter$handle))

sum_ct <- ct_federal_orgs_full %>% 
  dplyr::summarise(`Court Word Posts`=sum(court_words,na.rm=T),`Amicus Word Posts`=sum(amicus_words,na.rm=T) - sum(amicus_to_remove,na.rm=T),
                   `Case Name Posts` = sum(case_words,na.rm=T),`Total Posts`=n()) %>% as.data.frame() %>%
  mutate(Platform="Facebook",Dataset="National") 
sum_twitter <- twitter_federal_orgs_full %>%
  dplyr::summarise(`Court Word Posts`=sum(court_words,na.rm=T),`Amicus Word Posts`=sum(amicus_words,na.rm=T) - sum(amicus_to_remove,na.rm=T),
                   `Case Name Posts` = sum(case_words,na.rm=T),`Total Posts`=n()) %>% as.data.frame() %>%
  mutate(Platform="Twitter",Dataset="National")
sum_ct_amicus <- ct_amicus_orgs_full %>% 
  dplyr::summarise(`Court Word Posts`=sum(court_words,na.rm=T),`Amicus Word Posts`=sum(amicus_words,na.rm=T) - sum(amicus_to_remove,na.rm=T),
                   `Case Name Posts` = sum(case_words,na.rm=T),`Total Posts`=n()) %>% as.data.frame() %>%
  mutate(Platform="Facebook",Dataset="Amicus") 
sum_twitter_amicus <- twitter_amicus_orgs_full %>%
  dplyr::summarise(`Court Word Posts`=sum(court_words,na.rm=T),`Amicus Word Posts`=sum(amicus_words,na.rm=T) - sum(amicus_to_remove,na.rm=T),
                   `Case Name Posts` = sum(case_words,na.rm=T),`Total Posts`=n()) %>% as.data.frame() %>%
  mutate(Platform="Twitter",Dataset="Amicus")

sum_all <- as.data.frame(rbind(sum_ct,sum_twitter,sum_ct_amicus,sum_twitter_amicus))
print(xtable(sum_all,caption="Interest group summary statistics in the national and amicus datasets, January 1, 2016 to December 31, 2020 on Facebook and Twitter.",
             label="summarystats"),file="Tables/Table2.tex",
      include.rownames=F, hline.after = c(0,2,4))

#Table 3: summarytableaverages
ct_amicus_orgs_dictionary$Likes.at.Posting <- as.integer(ct_amicus_orgs_dictionary$Likes.at.Posting)
ct_amicus_orgs_dictionary$Followers.at.Posting <- as.integer(ct_amicus_orgs_dictionary$Followers.at.Posting)

summary.table <- full_join(ct_amicus_orgs_dictionary %>% select(-Likes.at.Posting,-Followers.at.Posting), 
                           ct_federal_orgs_dictionary) %>%
  distinct() %>%
  mutate(Group=ifelse(LegalOrg==1 & og_only==0,"Legal Amicus",
                      ifelse(LegalOrg==1 & og_only==1,"Legal Non-Amicus",
                             ifelse(LegalOrg==0 & og_only==1,"Non-Legal Non-Amicus",
                                    ifelse(LegalOrg==0 & og_only==0,"Non-Legal Amicus",NA))))) %>%
  group_by(Group) %>%
  dplyr::summarise(`Number of Groups`=length(unique(Page.Name)),`Total Posts`=n(),`Amicus Posts`=sum(amicus_words,na.rm=T)-sum(amicus_to_remove,na.rm=T)) %>% 
  mutate(`Percent Amicus Posts`=(`Amicus Posts`/`Total Posts`)*100) %>%
  filter(!is.na(Group))

print(xtable(summary.table,caption="Amicus posts by group type on Facebook, alongside the percent of posts by group type about amicus terms.",
             label="summarytableaverages",digits=c(0,0,0,0,0,2)),file="Tables/Table3.tex",include.rownames=F)

##################################################################
# Figures
##################################################################
#Figure 2: facebookweeklycourtspostsavgbygroup and twitterweeklycourtspostsavgbygroup
facebookcourts <- ct_federal_orgs_full %>% 
  filter(court_words==1) %>% filter(courts_to_remove==0) %>%
  mutate(Post="Courts",Platform="Facebook") %>%
  filter(!is.na(Type)) %>% filter(Type!="")

facebook.group.court <- facebookcourts %>%
  mutate(unique_groups = n_distinct(Page.Name))%>%
  ungroup() %>% 
  group_by(month = cut(as.Date(Created), "month"),Type) %>%
  dplyr::summarise(postsbygroup=n()/unique_groups) %>% unique() 

ggplot(facebook.group.court, aes(x = as.Date(month), y = postsbygroup, color = Type,shape=Type)) + geom_point(size=3)  + scale_colour_grey(start = 0.1, end = 0.7) + 
  xlab("Date of Post Creation") + ylab("Average Monthly Courts Posts Per Group Type")+theme(legend.text=element_text(size=14)) + 
  theme(legend.key = element_rect(fill = "white"))+scale_alpha(guide = "none") +scale_y_continuous(trans = 'log10',breaks = c(0,0.1, 0.2, 0.3,0.5),
                                                                                                   label = c("0","0.1", "0.2", "0.3","0.5"))+theme_bw()+ggtitle("Facebook")+ 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), 
        axis.line = element_line(colour = "black"))
ggsave("./Figures/Figure2.pdf",height=6, width=8)

#Figure 3: percpostsbylegal and percpostsbylegal_twitter
plot_legal <- ct_federal_orgs_full %>% filter(courts_to_remove==0) %>% filter(!is.na(LegalOrg)) %>%
  count(LegalOrg, court_words) %>%
  group_by(LegalOrg) %>%
  mutate(freq = n / sum(n)) %>% filter(court_words==1) %>% mutate(Type="Court") %>%
  dplyr::bind_rows(.,ct_federal_orgs_full %>% filter(!is.na(LegalOrg)) %>%
                     filter(amicus_to_remove==0) %>%
                     count(LegalOrg, amicus_words) %>%
                     group_by(LegalOrg) %>%
                     mutate(freq = n / sum(n)) %>% filter(amicus_words==1) %>%
                     mutate(Type="Amicus")) %>%
  dplyr::bind_rows(.,ct_federal_orgs_full %>% filter(!is.na(LegalOrg)) %>%
                     count(LegalOrg, case_words) %>%
                     group_by(LegalOrg) %>%
                     mutate(freq = n / sum(n)) %>% filter(case_words==1) %>% 
                     mutate(Type="Case Names"))

ggplot(plot_legal, aes(fill=as.factor(LegalOrg), y=freq, x=as.factor(Type))) + 
  geom_bar(position="dodge", stat="identity")+ 
  scale_y_continuous(labels = scales::percent)+theme_bw()+ggtitle("Facebook")+
  scale_fill_grey(start = 0.2, end = .7) + xlab("Post Type")+ylab("Percent All Posts")+ 
  labs(fill = "Legal Organization")+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), 
                                          axis.line = element_line(colour = "black"))
ggsave("./Figures/Figure3.pdf",height=6, width=8)

#Figure 5: differencedaysdecision and differencedaysdecision_twitter
time <- ct_fuzzy %>% 
  left_join(.,read.csv("./data/AmiciOrgsPairs.csv"),by=c("docket.x"="docket","User.Name"="facebook.handle.final",
                                                                         "LegalOrg"="LegalOrg")) %>%
  mutate(Post.Created = as.Date(Post.Created),
         `Cert Date` = as.numeric(as.Date(as.character(Post.Created), format="%Y-%m-%d")-as.Date(as.character(certdate), format="%Y-%m-%d")),
         `Argument Date` = as.numeric(as.Date(as.character(Post.Created), format="%Y-%m-%d")-as.Date(as.character(argued), format="%Y-%m-%d")),
         `Decided Date` = as.numeric(as.Date(as.character(Post.Created), format="%Y-%m-%d")-as.Date(as.character(opinion), format="%Y-%m-%d")))

data_long <- gather(time, `Difference From`, difference, `Cert Date`:`Decided Date`, factor_key=TRUE)

ggplot(data_long,aes(x=difference,color=`Difference From`,linetype=`Difference From`)) +
  geom_density(size=1.2) + geom_vline(xintercept=0)+
  ggtitle("Facebook")+theme_bw()+
  ylab("Density")+xlab("Difference (Days)")+scale_color_grey(start=.1,end=.8)+xlim(c(-500,500))+
  ylim(c(0,0.008))+
  scale_linetype_manual(values=c("Cert Date"="dashed","Argument Date"="twodash","Decided Date"="dotted"))+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))
ggsave("./Figures/Figure5.pdf",height=6, width=8)

#Figure 6: differencedaysdecisionbylegalorg and differencedaysdecision_legalorg_twitter
ggplot(data_long %>% filter(!is.na(LegalOrg))  %>% mutate(Legal=ifelse(LegalOrg==1,"Legal Org",ifelse(LegalOrg==0,"Not Legal Org",NA))),
       aes(x=difference,color=`Difference From`,linetype=`Difference From`)) + ggtitle("Facebook")+
  geom_density(size=1.2)+theme_bw() + geom_vline(xintercept=0)+
  # ggtitle("Difference in Days from Post to Days of \n Different Court Decisions by Group Type")+
  ylab("Density")+xlab("Difference (Days)")+scale_color_grey(start=.1,end=.8)+facet_wrap(~Legal)+
  ylim(0,0.01)+xlim(c(-500,500))+
  scale_linetype_manual(values=c("Cert Date"="dashed","Argument Date"="twodash","Decided Date"="dotted"))+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))
ggsave("./Figures/Figure6.pdf",height=6, width=8)

#Figure 7: differencedaysdecisionbygrouptype and differencedaysdecisionbygrouptype_twitter
ggplot(data_long %>% filter(Type.y!=""),aes(x=difference,color=`Difference From`,linetype=`Difference From`)) + ggtitle("Facebook")+
  geom_density(size=1.2) + geom_vline(xintercept=0)+
  #ggtitle("Difference in Days from Post to Days of \n Different Court Decisions")+
  ylab("Density")+xlab("Difference (Days)")+scale_color_grey(start=.1,end=.8)+facet_wrap(~Type.y)+
  xlim(c(-500,500))+theme_bw()+
  scale_linetype_manual(values=c("Cert Date"="dashed","Argument Date"="twodash","Decided Date"="dotted"))+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), 
                                                                                                                axis.line = element_line(colour = "black"))
ggsave("./Figures/Figure7.pdf",height=6, width=8)

